Colors

olive  = c("#34431A", "#637430", "#949F37", "#C0BF31", "#D8D466", "#EFEDB0")
green  = c("#18391A", "#246D36", "#458D41", "#62AA46", "#9DC376", "#D8E3C3")
teal   = c("#01354A", "#006474", "#14939A", "#4FB3B3", "#94CCCD", "#CBE2E8")
blue   = c("#192C57", "#0A4B83", "#056CA7", "#5791C3", "#9CC3E2", "#C7E2F5")
purple = c("#46174D", "#76266D", "#A04A8B", "#B276AA", "#CEA4C8", "#E8D3E6")
red    = c("#711311", "#972622", "#BD3B3B", "#D56362", "#E6A0A2", "#F5CCC8")
orange = c("#7F3218", "#AF4E24", "#E06B25", "#ED9646", "#F6BC7D", "#F9DCBC")
yellow = c("#685221", "#99752B", "#C69D2B", "#E3C24F", "#F1D687", "#FBEDC0")
mud    = c("#615C48", "#858062", "#A7A083", "#C6BFA3", "#E4DDCA", "#F6F3EC")
grey   = c("#1F2E43", "#47536B", "#6D788C", "#98A1B0", "#C8CCD8", "#E4E3E9")
skin   = c("#422B1D", "#744D3C", "#906753", "#BC9578", "#DBB9A0", "#F5E2D3")


fill_colors <- c(
    # 4d cyto
    "4d_CNT_cyto_1" = olive[2], "4d_OPTN_cyto_1" = orange[2],
    "4d_CNT_cyto_2" = olive[2], "4d_OPTN_cyto_2" = orange[2],
    "4d_CNT_cyto_3" = olive[2], "4d_OPTN_cyto_3" = orange[2],
    # 4d synap
    "4d_CNT_synap_1" = green[2], "4d_OPTN_synap_1" = red[2],
    "4d_CNT_synap_2" = green[2], "4d_OPTN_synap_2" = red[2],
    "4d_CNT_synap_3" = green[2], "4d_OPTN_synap_3" = red[2],
    # 5d synap
    "5d_CNT_cyto_1" = olive[3], "5d_OPTN_cyto_1" = orange[3],
    "5d_CNT_cyto_2" = olive[3], "5d_OPTN_cyto_2" = orange[3],
    "5d_CNT_cyto_3" = olive[3], "5d_OPTN_cyto_3" = orange[3],
    # 5d synap
    "5d_CNT_synap_1" = green[3], "5d_OPTN_synap_1" = red[3],
    "5d_CNT_synap_2" = green[3], "5d_OPTN_synap_2" = red[3],
    "5d_CNT_synap_3" = green[3], "5d_OPTN_synap_3" = red[3],
    # 6d synap
    "6d_CNT_cyto_1" = olive[4], "6d_OPTN_cyto_1" = orange[4],
    "6d_CNT_cyto_2" = olive[4], "6d_OPTN_cyto_2" = orange[4],
    "6d_CNT_cyto_3" = olive[4], "6d_OPTN_cyto_3" = orange[4],
    # 6d synap
    "6d_CNT_synap_1" = green[4], "6d_OPTN_synap_1" = red[4],
    "6d_CNT_synap_2" = green[4], "6d_OPTN_synap_2" = red[4],
    "6d_CNT_synap_3" = green[4], "6d_OPTN_synap_3" = red[4]
  )

QCfill = grey[3]
screefill = grey[3]
downfill = purple[1]
upfill = blue[1]
nsfill = grey[5]
vennfillup   = c("4d up cyto"    = blue[5],   "4d up synap"   = teal[5],
                 "5d up cyto"    = blue[6],   "5d up synap"   = teal[6])
vennfilldown = c("4d down cyto"  = purple[5], "4d down synap" = yellow[5],
                 "5d down cyto"  = purple[6], "5d down synap" = yellow[6])

Packages

options(repos = c(CRAN = "https://cloud.r-project.org/"))


required_pkgs <- c("tidyverse",
                   "tidyr",
                   "dplyr",
                   "BiocManager",
                   "readr",
                   "stringr",
                   "plotly",
                   "ggplot2",
                   "patchwork",
                   "grid",
                   "knitr",
                   "sysfonts",
                   "showtext",
                   "htmlwidgets",
                   "UpSetR",
                   "eulerr",
                   "tibble",
                   "purrr",
                   "ggrepel")

bioc_pkgs <- c("limma",
               "impute",
               "UniProt.ws",
               "pcaMethods", 
               "orthogene")


#CRAN packages
for (pkg in required_pkgs) {
  if (!requireNamespace(pkg, quietly = TRUE)) {
    install.packages(pkg, dependencies = TRUE)
  }
  library(pkg, character.only = TRUE)
}


#Bioconductor packages
if (!requireNamespace("BiocManager", quietly = TRUE)) {
  install.packages("BiocManager")
}

for (pkg in bioc_pkgs) {
  if (!requireNamespace(pkg, quietly = TRUE)) {
    BiocManager::install(pkg, ask = FALSE, update = FALSE)
  }
  library(pkg, character.only = TRUE)
}
## 
##   Es gibt eine Binärversion, jedoch ist der Quelltext neuer:
##           binary source needs_compilation
## orthogene 1.16.0 1.16.1             FALSE

Read in raw data and tidy up for use

raw <- read.delim("4d_5d_normalized_prot.tsv", sep = "\t", header = TRUE)
all_cols <- colnames(raw)

parsed <- purrr::map_dfr(all_cols, parse_colname)

raw_renamed <- raw
idx <- match(parsed$old_name, names(raw_renamed))
names(raw_renamed)[idx] <- parsed$new_name
anno <- annotate_proteins_uniprot(raw_df = raw_renamed,
                                  tax_id = 10090,
                                  remove_suffix = "_MOUSE$",
                                  proteinID = "PG.ProteinAccessions")
data <- anno$data
mapping <- anno$mapping   # optional backup / diagnostics
# GLOBAL
global_clean <- function(data) {

  message("Handling duplicate / missing gene names\n")
  duplicates <- data[duplicated(data$Gene.Names) |
                       duplicated(data$Gene.Names, fromLast = TRUE), ]
  duplicated_genes <- unique(data$Gene.Names[duplicated(data$Gene.Names)])
  message("This dataset contains ", nrow(duplicates),
          " duplicates in Gene.Names\n")
  if (length(duplicated_genes) > 0) {
    message("Duplicated gene names: ",
            paste(duplicated_genes, collapse = ", "), "\n")
  }
  
  ix <- as.integer(rownames(duplicates))
  data$Gene.Names[ix] <- data$Entry.Name[ix]
  rownames(data) <- data$Gene.Names
  
  message("-------------------------------------------------------------")
  message("This dataset contains ", nrow(data), " proteins\n")
  message("-------------------------------------------------------------\n")
  
  message("Removing contaminations\n")
  
  keratins <- grep("keratin", data$descriptions, ignore.case = TRUE)
  if (length(keratins) > 0) data <- data[-keratins, ]
  message(length(keratins), " keratin proteins removed\n")
  
  album <- grep("album", data$descriptions, ignore.case = TRUE)
  if (length(album) > 0) data <- data[-album, ]
  message(length(album), " albumin proteins removed\n")
  
  trypsin <- grep("trypsin", data$descriptions, ignore.case = TRUE)
  if (length(trypsin) > 0) data <- data[-trypsin, ]
  message(length(trypsin), " trypsin proteins removed\n\n")
  
  return(data)
}


# SUBSETS

clean_subset <- function(data,
                         batch,
                         fraction,
                         metric = "MS1",
                         namecols = c("Entry.Name",
                                      "Gene.Names",
                                      "descriptions",
                                      "PG.Pvalue",
                                      "PG.Qvalue",
                                      "PG.Cscore",
                                      "proteinID")) {
  
   key <- paste(batch, fraction, sep = "_")
  message("=== Processing subset: ", key, " ===\n")
  
  # 1) identify MS1 columns for this subset (this is the subset definition)
  pattern_ms1 <- paste0("^", batch, "_.*_", fraction, "_[0-9]+_", metric, "$")
  ms1_cols <- grep(pattern_ms1, colnames(data), value = TRUE)
  
  if (length(ms1_cols) == 0L) {
    stop("No MS1 columns found for subset ", key)
  }
  
  # derive matching MS2 / RunEv / Precursor columns
  base           <- sub(paste0("_", metric, "$"), "", ms1_cols)
  ms2_cols       <- paste0(base, "_MS2")
  runev_cols     <- paste0(base, "_RunEvCount")
  precursor_cols <- paste0(base, "_PrecursorQuant")
  
  ms2_cols       <- intersect(ms2_cols, colnames(data))
  runev_cols     <- intersect(runev_cols, colnames(data))
  precursor_cols <- intersect(precursor_cols, colnames(data))
  
  # 2) create the actual subset data frame:
  #    all rows, only metadata + cols for this subset
  subset_cols <- c(namecols, ms1_cols, ms2_cols, runev_cols, precursor_cols)
  subset_cols <- intersect(subset_cols, colnames(data))
  
  data <- data[, subset_cols, drop = FALSE]
  
  
  # 3) statistical filters (only within this subset)
  message("Removing proteins with low statistical power in subset ", key, "\n")
  
  Pvalue <- data %>% filter(PG.Pvalue < 0.05)
  message(nrow(data) - nrow(Pvalue),
          " proteins with p >= 0.05 removed\n")
  
  Qvalue <- Pvalue %>% filter(PG.Qvalue < 0.01)
  message(nrow(Pvalue) - nrow(Qvalue),
          " proteins with Q >= 0.01 removed\n")
  
  Cscore <- Qvalue %>% filter(PG.Cscore >= 3)
  message(nrow(Qvalue) - nrow(Cscore),
          " proteins with C-score < 3 removed\n")
  
  evid <- Cscore %>%
    filter(rowSums(.[, runev_cols] >= 2, na.rm = TRUE) > 0)
  message(nrow(Cscore) - nrow(evid),
          " proteins with run evidence counts < 2 removed\n")
  
  prec <- evid %>%
    filter(rowSums(.[, precursor_cols] >= 2, na.rm = TRUE) > 0)
  message(nrow(evid) - nrow(prec),
          " proteins with < 2 precursors removed\n")
  
  #intensity_threshold <- quantile(as.matrix(prec[, ms1_cols]),
  #                                0.05, na.rm = TRUE)
  #intensities <- prec %>%
  #  filter(rowSums(.[, ms1_cols] >= intensity_threshold,
  #                 na.rm = TRUE) > 0)
  #message(nrow(prec) - nrow(intensities),
  #        " proteins with MS1 in lower 5% removed\n\n")
  
  data <- prec
  
  # 3) set intensities to numeric
  data[, c(ms1_cols, ms2_cols)] <- lapply(
    data[, c(ms1_cols, ms2_cols), drop = FALSE],
    as.numeric
  )
  
  data_full_cleaned <- data
  
  # 4) subset to Gene.Names + MS1, rename columns
  message("Subsetting Gene.Names and MS1 columns for ", key, "\n")
  data_sub <- data %>%
    dplyr::select(Gene.Names, all_of(ms1_cols))
  
  colnames(data_sub)[match(ms1_cols, colnames(data_sub))] <-
    sub(paste0("_", metric, "$"), "", ms1_cols)
  
  ms1_base <- setdiff(colnames(data_sub), "Gene.Names")
  data_clean_raw <- data_sub
  
  # 5) log2 transform
  message("Log2 transform MS1 intensities\n\n")
  data_sub[, ms1_base] <- log2(pmax(data_sub[, ms1_base], 1))
  
  data_pca <- data_sub %>%
    dplyr::select(all_of(ms1_base))
  
  # 6) bpca imputation
  message("Bayesian PCA imputation for ", key, "\n")
  bpca_impute <- function(df, nPcs = 6) {
    mat <- as.matrix(df)
    fit <- pca(mat, method = "bpca", nPcs = nPcs)
    completeObs(fit)
  }
  data_bpca <- bpca_impute(data_pca, nPcs = 5)
  
  data_pca_mat <- t(data_bpca)
  
  # 7) pivot long for QC plots
  message("Pivot cleaned dataset to long format for ", key, "\n")
  data_piv <- data_sub %>%
    dplyr::select(-Gene.Names) %>%
    tibble::rownames_to_column(var = "gene_names") %>%
    tidyr::pivot_longer(
      cols = -gene_names,
      names_to = "sample",
      values_to = "intensities"
    )
  
  results  <- list(
    batch              = batch,
    fraction           = fraction,
    data_full_cleaned  = data_full_cleaned,
    data_clean_raw     = data_clean_raw,
    data_bpca          = data_bpca,
    data_pca_mat       = data_pca_mat,
    data_piv           = data_piv
  )
  return(results)
}
## Handling duplicate / missing gene names
## This dataset contains 28 duplicates in Gene.Names
## Duplicated gene names:
## -------------------------------------------------------------
## This dataset contains 7836 proteins
## -------------------------------------------------------------
## Removing contaminations
## 20 keratin proteins removed
## 1 albumin proteins removed
## 2 trypsin proteins removed
## === Processing subset: 5d_cyto ===
## Removing proteins with low statistical power in subset 5d_cyto
## 45 proteins with p >= 0.05 removed
## 0 proteins with Q >= 0.01 removed
## 0 proteins with C-score < 3 removed
## 408 proteins with run evidence counts < 2 removed
## 75 proteins with < 2 precursors removed
## Subsetting Gene.Names and MS1 columns for 5d_cyto
## Log2 transform MS1 intensities
## Bayesian PCA imputation for 5d_cyto
## Pivot cleaned dataset to long format for 5d_cyto
## === Processing subset: 5d_synap ===
## Removing proteins with low statistical power in subset 5d_synap
## 45 proteins with p >= 0.05 removed
## 0 proteins with Q >= 0.01 removed
## 0 proteins with C-score < 3 removed
## 667 proteins with run evidence counts < 2 removed
## 92 proteins with < 2 precursors removed
## Subsetting Gene.Names and MS1 columns for 5d_synap
## Log2 transform MS1 intensities
## Bayesian PCA imputation for 5d_synap
## Pivot cleaned dataset to long format for 5d_synap
## === Processing subset: 4d_cyto ===
## Removing proteins with low statistical power in subset 4d_cyto
## 45 proteins with p >= 0.05 removed
## 0 proteins with Q >= 0.01 removed
## 0 proteins with C-score < 3 removed
## 448 proteins with run evidence counts < 2 removed
## 70 proteins with < 2 precursors removed
## Subsetting Gene.Names and MS1 columns for 4d_cyto
## Log2 transform MS1 intensities
## Bayesian PCA imputation for 4d_cyto
## Pivot cleaned dataset to long format for 4d_cyto
## === Processing subset: 4d_synap ===
## Removing proteins with low statistical power in subset 4d_synap
## 45 proteins with p >= 0.05 removed
## 0 proteins with Q >= 0.01 removed
## 0 proteins with C-score < 3 removed
## 566 proteins with run evidence counts < 2 removed
## 80 proteins with < 2 precursors removed
## Subsetting Gene.Names and MS1 columns for 4d_synap
## Log2 transform MS1 intensities
## Bayesian PCA imputation for 4d_synap
## Pivot cleaned dataset to long format for 4d_synap

The subset datasets now contain

Quality Control & PCA

norm_5d_cyto <- median_sweep_pipeline(data_5d_cyto)
## Median sweep normalization for 5d_cyto
norm_5d_synap <- median_sweep_pipeline(data_5d_synap)
## Median sweep normalization for 5d_synap
norm_4d_cyto <- median_sweep_pipeline(data_4d_cyto)
## Median sweep normalization for 4d_cyto
norm_4d_synap <- median_sweep_pipeline(data_4d_synap)
## Median sweep normalization for 4d_synap
qc_5d_cyto <- plot_qc(
  data_piv_pre   = data_5d_cyto$data_piv,
  data_piv_post  = norm_5d_cyto$data_piv_swept,
  title          = "5d cyto"
)
qc_5d_cyto

qc_5d_synap <- plot_qc(
  data_piv_pre   = data_5d_synap$data_piv,
  data_piv_post  = norm_5d_synap$data_piv_swept,
  title          = "5d synap"
)
qc_5d_synap

qc_4d_cyto <- plot_qc(
  data_piv_pre   = data_4d_cyto$data_piv,
  data_piv_post  = norm_4d_cyto$data_piv_swept,
  title          = "4d cyto"
)
qc_4d_cyto

qc_4d_synap <- plot_qc(
  data_piv_pre   = data_4d_synap$data_piv,
  data_piv_post  = norm_4d_synap$data_piv_swept,
  title          = "4d synap"
)
qc_4d_synap

pca_5d_cyto <- plot_pca_comparison(
  data_pca_pre  = data_5d_cyto$data_pca_mat,
  data_pca_post = norm_5d_cyto$data_pca_swept,
  title         = "5d cyto"
)
## Importance of components:
##                            PC1     PC2     PC3     PC4      PC5      PC6
## Standard deviation     51.0163 46.3455 31.5090 28.8567 26.62518 3.42e-13
## Proportion of Variance  0.3573  0.2948  0.1363  0.1143  0.09731 0.00e+00
## Cumulative Proportion   0.3573  0.6521  0.7884  0.9027  1.00000 1.00e+00
## Importance of components:
##                            PC1     PC2     PC3     PC4     PC5       PC6
## Standard deviation     55.1355 37.9686 31.6689 30.8974 29.0844 1.187e-13
## Proportion of Variance  0.4173  0.1979  0.1377  0.1310  0.1161 0.000e+00
## Cumulative Proportion   0.4173  0.6152  0.7528  0.8839  1.0000 1.000e+00
pca_5d_cyto
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

pca_5d_synap <- plot_pca_comparison(
  data_pca_pre  = data_5d_synap$data_pca_mat,
  data_pca_post = norm_5d_synap$data_pca_swept,
  title         = "5d synap"
)
## Importance of components:
##                            PC1     PC2     PC3      PC4      PC5       PC6
## Standard deviation     52.5878 46.6017 27.3024 26.43207 25.05444 2.855e-13
## Proportion of Variance  0.3946  0.3099  0.1064  0.09968  0.08956 0.000e+00
## Cumulative Proportion   0.3946  0.7044  0.8108  0.91044  1.00000 1.000e+00
## Importance of components:
##                            PC1     PC2     PC3     PC4     PC5       PC6
## Standard deviation     53.9260 38.9400 30.5069 29.3260 28.1777 1.086e-13
## Proportion of Variance  0.4149  0.2163  0.1328  0.1227  0.1133 0.000e+00
## Cumulative Proportion   0.4149  0.6312  0.7640  0.8867  1.0000 1.000e+00
pca_5d_synap
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

pca_4d_cyto <- plot_pca_comparison(
  data_pca_pre  = data_4d_cyto$data_pca_mat,
  data_pca_post = norm_4d_cyto$data_pca_swept,
  title         = "4d cyto"
)
## Importance of components:
##                            PC1     PC2      PC3      PC4      PC5       PC6
## Standard deviation     65.4666 35.5883 26.06473 23.83509 21.21598 2.835e-13
## Proportion of Variance  0.5911  0.1747  0.09371  0.07836  0.06209 0.000e+00
## Cumulative Proportion   0.5911  0.7659  0.85955  0.93791  1.00000 1.000e+00
## Importance of components:
##                            PC1     PC2     PC3     PC4     PC5       PC6
## Standard deviation     51.8778 37.8725 35.3342 32.2467 28.9140 1.303e-13
## Proportion of Variance  0.3712  0.1978  0.1722  0.1434  0.1153 0.000e+00
## Cumulative Proportion   0.3712  0.5690  0.7413  0.8847  1.0000 1.000e+00
pca_4d_cyto
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

pca_4d_synap <- plot_pca_comparison(
  data_pca_pre  = data_4d_synap$data_pca_mat,
  data_pca_post = norm_4d_synap$data_pca_swept,
  title         = "4d synap"
)
## Importance of components:
##                            PC1     PC2     PC3     PC4     PC5       PC6
## Standard deviation     48.5087 38.9431 36.0476 32.6563 29.7738 5.214e-13
## Proportion of Variance  0.3304  0.2129  0.1825  0.1497  0.1245 0.000e+00
## Cumulative Proportion   0.3304  0.5433  0.7258  0.8755  1.0000 1.000e+00
## Importance of components:
##                          PC1     PC2     PC3     PC4     PC5       PC6
## Standard deviation     49.21 38.2824 37.3785 30.8316 29.7876 1.234e-13
## Proportion of Variance  0.34  0.2058  0.1962  0.1335  0.1246 0.000e+00
## Cumulative Proportion   0.34  0.5458  0.7419  0.8754  1.0000 1.000e+00
pca_4d_synap
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

combined_df <- bind_rows(
  as_tibble(norm_5d_cyto$data_pca_swept, rownames = "set"),
  as_tibble(norm_5d_synap$data_pca_swept, rownames = "set"),
  as_tibble(norm_4d_cyto$data_pca_swept, rownames = "set"),
  as_tibble(norm_4d_synap$data_pca_swept, rownames = "set")
)

# set as rownames again, drop column, and fill NAs with 0
combined_mat <- combined_df %>%
  column_to_rownames("set") %>%
  as.matrix()

combined_mat[is.na(combined_mat)] <- 0


pca_combined <- run_pca_analysis(data_matrix = combined_mat,
                 plot_title = "PCA complete dataset 4d + 5d",
                 na_method = "bPCA + median-sweep + missing replaced with 0")
## Importance of components:
##                            PC1     PC2      PC3      PC4      PC5      PC6
## Standard deviation     55.7254 40.5661 24.69283 21.19612 13.78064 11.24598
## Proportion of Variance  0.4259  0.2257  0.08363  0.06162  0.02605  0.01735
## Cumulative Proportion   0.4259  0.6516  0.73524  0.79687  0.82291  0.84026
##                             PC7     PC8     PC9   PC10    PC11    PC12    PC13
## Standard deviation     11.06375 9.81358 9.63166 9.1582 9.01806 8.74694 8.38317
## Proportion of Variance  0.01679 0.01321 0.01272 0.0115 0.01115 0.01049 0.00964
## Cumulative Proportion   0.85705 0.87026 0.88298 0.8945 0.90564 0.91613 0.92577
##                           PC14    PC15    PC16    PC17    PC18    PC19    PC20
## Standard deviation     8.01507 7.86970 7.74196 7.60455 7.36456 7.16044 7.07138
## Proportion of Variance 0.00881 0.00849 0.00822 0.00793 0.00744 0.00703 0.00686
## Cumulative Proportion  0.93458 0.94308 0.95130 0.95923 0.96667 0.97370 0.98056
##                           PC21    PC22    PC23      PC24
## Standard deviation     6.97961 6.92075 6.71909 5.094e-14
## Proportion of Variance 0.00668 0.00657 0.00619 0.000e+00
## Cumulative Proportion  0.98724 0.99381 1.00000 1.000e+00
pca_combined$scree_plot / pca_combined$pca_plot + plot_layout(guides = "collect")
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

Limma & Volcano plots

limma_5d_cyto <- run_limma_subset(data_mat = norm_5d_cyto$data_swept,
                                  batch = "5d",
                                  fraction = "cyto")
## Running limma for 5d_cyto
limma_5d_synap <- run_limma_subset(data_mat = norm_5d_synap$data_swept,
                                   batch = "5d",
                                   fraction = "synap")
## Running limma for 5d_synap
limma_4d_cyto <- run_limma_subset(data_mat = norm_4d_cyto$data_swept,
                                  batch = "4d",
                                  fraction = "cyto")
## Running limma for 4d_cyto
limma_4d_synap <- run_limma_subset(data_mat = norm_4d_synap$data_swept,
                                   batch = "4d",
                                   fraction = "synap")
## Running limma for 4d_synap
# Volcano plots
volcano_5d_cyto <- plot_volcano_limma(limma_5d_cyto)
volcano_5d_cyto
volcano_5d_synap <- plot_volcano_limma(limma_5d_synap)
volcano_5d_synap
volcano_4d_cyto <- plot_volcano_limma(limma_4d_cyto)
volcano_4d_cyto
volcano_4d_synap <- plot_volcano_limma(limma_4d_synap)
volcano_4d_synap

Venn and UpSet diagrams

cyto_4d <- read.csv("limma_4d_cyto.csv",
                 row.names = 1,
                 stringsAsFactors = FALSE)

synap_4d <- read.csv("limma_4d_synap.csv",
                  row.names = 1,
                  stringsAsFactors = FALSE)


cyto_5d <- read.csv("limma_5d_cyto.csv",
                 row.names = 1,
                 stringsAsFactors = FALSE)

synap_5d <- read.csv("limma_5d_synap.csv",
                  row.names = 1,
                  stringsAsFactors = FALSE)


get_up_genes <- function(df) {
  listed <- df$first_gene[df$category == "upregulated"]
  return(listed)
}
get_down_genes <- function(df) {
  listed <- df$first_gene[df$category == "downregulated"]
  return(listed)
}

cyto_4d_up <- get_up_genes(cyto_4d)
synap_4d_up <- get_up_genes(synap_4d)
cyto_5d_up <- get_up_genes(cyto_5d)
synap_5d_up <- get_up_genes(synap_5d)

cyto_4d_down <- get_down_genes(cyto_4d)
synap_4d_down <- get_down_genes(synap_4d)
cyto_5d_down <- get_down_genes(cyto_5d)
synap_5d_down <- get_down_genes(synap_5d)
gene_sets_up <- list(
  `4d up cyto`   = cyto_4d_up,
  `4d up synap`  = synap_4d_up,
  `5d up cyto`   = cyto_5d_up,
  `5d up synap`  = synap_5d_up
)
gene_sets_down <- list(
  `4d down cyto`   = cyto_4d_down,
  `4d down synap`  = synap_4d_down,
  `5d down cyto`   = cyto_5d_down,
  `5d down synap`  = synap_5d_down
)


# Fit Euler/Venn model
fit_up <- euler(gene_sets_up)
fit_down <- euler(gene_sets_down)

# Plot
Vennup <- plot(
  fit_up,
  fills = list(fill = vennfillup, alpha = 0.75),
  labels = list(col = "black", cex = 0.8),
  edges = list(col = nsfill),
  quantities = list(type = "counts", cex = 0.7),
  main = "Overlap of upregulated genes in 4d and 5d synap and cyto"
)
Venndown <- plot(
  fit_down,
  fills = list(fill = vennfilldown, alpha = 0.75),
  labels = list(col = "black", cex = 0.8),
  edges = list(col = nsfill),
  quantities = list(type = "counts", cex = 0.7),
  main = "Overlap of downregulated genes in 4d and 5d synap and cyto"
)

Vennup

Venndown

# Convert to incidence data frame
up_df <- fromList(gene_sets_up)
down_df <- fromList(gene_sets_down)


upset_up <- upset(
  up_df,
  nsets = 4,
  nintersects = 11,
  sets = colnames(up_df),
  order.by = "degree",
  keep.order = TRUE,
  matrix.color = upfill,
  main.bar.color = screefill,
  sets.bar.color = screefill,
  shade.color = nsfill,
  mainbar.y.label = "Intersecting upregulated genes",
  sets.x.label = "Genes per subset",
  set_size.show = TRUE,
  text.scale = c(1.3, 1, 1.3, 1.1, 1.3, 1.1)
)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the UpSetR package.
##   Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the UpSetR package.
##   Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
upset_down <- upset(
  down_df,
  nsets = 4,
  nintersects = 11,
  sets = colnames(down_df),
  order.by = "degree",
  keep.order = TRUE,
  matrix.color = downfill,
  main.bar.color = screefill,
  sets.bar.color = screefill,
  shade.color = nsfill,
  mainbar.y.label = "Intersecting downregulated genes",
  sets.x.label = "Genes per subset",
  set_size.show = TRUE,
  text.scale = c(1.3, 1, 1.3, 1.1, 1.3, 1.1)
)

upset_up

upset_down